import json
import re
import time
import sys
import requests

from config import getConfigParams
from requestUrl import getDocByUrl,getEntryIdByUrl
from formatContent import formatContent, filterTag
from logs import saveLog
from pyquery import PyQuery as pq

# 获取配置参数
configParams = getConfigParams(sys.argv)
url = configParams['real_url']
env = configParams['env']

# 获取文章ID
entryId = getEntryIdByUrl(configParams['base_url'])

headers = {
    'referer': 'https://www.guancha.cn',
}
# 获取html文本doc，得到pq对象
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
doc = pq(response.text)

# 标题
title = doc('title').text()

# 文章正文
articleContent = doc('.content .all-txt').html()

# 文章作者
authorName = doc(doc('.time span')[2]).text()
authorName = authorName[authorName.find('：') + 1:]

# 去【李世泉】那里过滤标签
content = formatContent(articleContent,env=env)

result = {
    'entry_id': entryId,
    'author_name': authorName,
    'cover_url': '',
    'public_time': 0,
    'title': title,
    'not_format_content': articleContent,
    'content': content,
}
logData = {
    'params': sys.argv,
    'result': result
}
saveLog(json.dumps(logData, ensure_ascii=False))
print(json.dumps(result, ensure_ascii=False))
